{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b2d6451b-02c9-4ac4-8ecd-eeab4c0c62ac",
   "metadata": {},
   "source": [
    "### 版本信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59607366-f50d-4a12-a6f5-8f4d99a1b0e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import transformers\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "import torch\n",
    "\n",
    "# 打印版本号\n",
    "print(\"transformers version:\", transformers.__version__)\n",
    "print(\"torch version:\", torch.__version__)\n",
    "\n",
    "# 检查系统中是否有可用的 GPU\n",
    "if torch.cuda.is_available():\n",
    "    # 获取可用的 GPU 设备数量\n",
    "    num_devices = torch.cuda.device_count()\n",
    "    print(\"可用 GPU 数量:\", num_devices)\n",
    "\n",
    "    # 遍历所有可用的 GPU 设备并打印详细信息\n",
    "    for i in range(num_devices):\n",
    "        device = torch.cuda.get_device_properties(i)\n",
    "        print(f\"\\nGPU {i} 的详细信息:\")\n",
    "        print(\"名称:\", device.name)\n",
    "        print(\"计算能力:\", f\"{device.major}.{device.minor}\")\n",
    "        print(\"内存总量 (GB):\", round(device.total_memory / (1024**3), 1))\n",
    "else:\n",
    "    print(\"没有可用的 GPU\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac275fe8-0db3-40b4-a1da-727ede64bca0",
   "metadata": {},
   "source": [
    "### FP16显存占用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88b66c08-79a8-40fe-ba87-25a759999e6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载模型\n",
    "model_name = \"/path/to/llama-2-7b-hf\" # 你模型存放的位置\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda:0\", torch_dtype=torch.float16)\n",
    "# 模型总参数\n",
    "total_parameters = model.num_parameters()\n",
    "print(\"Total parameters in the model:\", total_parameters)\n",
    "\n",
    "# 计算结果\n",
    "size_per_parameter_bytes = 2\n",
    "# 计算模型在显存中的总空间（以字节为单位）\n",
    "total_memory_bytes = total_parameters * size_per_parameter_bytes\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "total_memory_gb = total_memory_bytes / (1024**3)\n",
    "print(\"Total memory occupied by the model in MB:\", total_memory_gb)\n",
    "\n",
    "# torch显示结果\n",
    "memory_allocated = torch.cuda.memory_allocated(device='cuda:0')\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "memory_allocated_gb = memory_allocated / (1024**3)\n",
    "print(\"Memory allocated by the model in GB:\", memory_allocated_gb)\n",
    "\n",
    "# 显卡显示结果\n",
    "nvidia-smi"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6dde2aff-53d4-487f-a68a-8d8692979a1b",
   "metadata": {},
   "source": [
    "### FP32显存占用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f9522a8-7ff5-4201-8bf3-d599dc8a94f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 和上述是一模一样的代码，就是两个地方不一样\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda:0\", torch_dtype=torch.float32)\n",
    "...\n",
    "size_per_parameter_bytes = 4"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8015c4de-29c3-4f26-abde-e42c33d49dc0",
   "metadata": {},
   "source": [
    "### BF16显存占用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7414a663-a4b4-485b-95fc-48a4015a279b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 和上述是一模一样的代码，就是两个地方不一样\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda:0\", torch_dtype=torch.bfloat16)\n",
    "...\n",
    "size_per_parameter_bytes = 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ca433d2-b8b2-4511-b341-347f57e1d4e6",
   "metadata": {},
   "source": [
    "### 模型精度转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78f675c7-2516-4e19-95fd-790037e08d66",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 以float32加载\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda:0\", torch_dtype=torch.float32)\n",
    "# 计算模型的显存占用\n",
    "memory_allocated = torch.cuda.memory_allocated(device='cuda:0')\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "memory_allocated_gb = memory_allocated / (1024**3)\n",
    "print(\"Memory allocated by the model in GB:\", memory_allocated_gb)\n",
    "\n",
    "# 转为float16\n",
    "model.half()\n",
    "# 计算模型的显存占用\n",
    "memory_allocated = torch.cuda.memory_allocated(device='cuda:0')\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "memory_allocated_gb = memory_allocated / (1024**3)\n",
    "print(\"Memory allocated by the model in GB:\", memory_allocated_gb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "711f44be-4908-4975-9fd2-640339e9afd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 以float16加载\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda:0\", torch_dtype=torch.float16)\n",
    "# 计算模型的显存占用\n",
    "memory_allocated = torch.cuda.memory_allocated(device='cuda:0')\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "memory_allocated_gb = memory_allocated / (1024**3)\n",
    "print(\"Memory allocated by the model in GB:\", memory_allocated_gb)\n",
    "\n",
    "# 转为float16\n",
    "model.float()\n",
    "# 计算模型的显存占用\n",
    "memory_allocated = torch.cuda.memory_allocated(device='cuda:0')\n",
    "# 将字节转换为更常见的单位（GB）\n",
    "memory_allocated_gb = memory_allocated / (1024**3)\n",
    "print(\"Memory allocated by the model in GB:\", memory_allocated_gb)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3d51a524-9dfa-4f5e-b9c2-48486cd3713d",
   "metadata": {},
   "source": [
    "### 如何转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b87b4f9e-e46d-458c-b601-aaa62e286662",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Tensor:\n",
      " tensor([3.1400])\n",
      "Half-Precision Tensor:\n",
      " tensor([3.1406], dtype=torch.float16)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "# 创建一个单精度浮点数的张量\n",
    "float_tensor = torch.tensor([3.14], dtype=torch.float32)\n",
    "# 将张量转换为半精度浮点数\n",
    "half_tensor = float_tensor.half()\n",
    "# 打印转换后的张量及其数据类型\n",
    "print(\"Original Tensor:\\n\", float_tensor)\n",
    "print(\"Half-Precision Tensor:\\n\", half_tensor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "040f3c1d-20f5-42f1-b3d6-e5dd69aefafc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
